package com.zxl.grocery.modular.crawler.processor;

import com.zxl.grocery.modular.crawler.base.AbstractPageProcessor;
import com.zxl.grocery.modular.crawler.base.Constant;
import com.zxl.grocery.modular.crawler.entity.CrawlerFunny;
import org.apache.commons.lang3.StringUtils;
import us.codecraft.webmagic.Page;

/**
 * info:段子哥
 * Created by Lzx on 2017/7/12.
 */
public class DuanZiGeProcessor extends AbstractPageProcessor {

    public static final String list = "http://www.duanzige.cn/list-2-\\w+.html";

    @Override
    public String[] initTargetUrl() {
        return new String[]{"http://www.duanzige.cn/list-2-1.html"};
    }

    @Override
    public void process(Page page) {
        if (page.getUrl().regex(list).match()) {
            page.addTargetRequests(page.getHtml().xpath("//div[@class='xiaohua-data']/h3").links().all());
        } else {
            CrawlerFunny funny = new CrawlerFunny();
            funny.setTitle(page.getHtml().xpath("//div[@class='xiaohua-data']/h1/text()").toString());
            funny.setContent(page.getHtml().xpath("//div[@class='xiaohua-data']/div[@class='content']/p/text()").toString());
            funny.setSourcesId(Constant.Sources_DuanZiGe);
            if (StringUtils.isNotEmpty(funny.getTitle()) && StringUtils.isNotEmpty(funny.getContent())) {
                page.putField("funny", funny);
            }
        }

    }

}
